{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "apart-basis",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import geopandas as gpd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "warming-witch",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ICE-Spark baseline comparision\n",
    "# ST join task for S2 and Is2 datasets (1 month, 3 months, and 6 months)\n",
    "# first try sjoin first and then inequalized join later"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "pediatric-effect",
   "metadata": {},
   "outputs": [],
   "source": [
    "Sentinel_gdf = gpd.read_file('../../Metadata/for_baselines/sentinel_6_month.shp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "442d064f-8854-4029-a4ad-36eacb5c325a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "configured-colombia",
   "metadata": {},
   "outputs": [],
   "source": [
    "I2_gdf = gpd.read_file('../../Metadata/for_baselines/is2_6month.shp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "bigger-enhancement",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Descriptio</th>\n",
       "      <th>is_timesta</th>\n",
       "      <th>geometry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>RGT 935 26-Feb-2020 00:00:15 DOY 57 Cycle 6</td>\n",
       "      <td>1582693200</td>\n",
       "      <td>LINESTRING (-124.09 -1.0742, -124.19 0.013965)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>RGT 936 26-Feb-2020 00:00:32 DOY 57 Cycle 6</td>\n",
       "      <td>1582693200</td>\n",
       "      <td>LINESTRING (-124.19 0.013965, -124.58 3.8548, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>RGT 936 26-Feb-2020 00:24:32 DOY 57 Cycle 6</td>\n",
       "      <td>1582693200</td>\n",
       "      <td>LINESTRING (97.637 87.317, 69.029 84.053, 61.1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>RGT 936 26-Feb-2020 01:00:32 DOY 57 Cycle 6</td>\n",
       "      <td>1582696800</td>\n",
       "      <td>LINESTRING (38.443 -49.498, 37.852 -53.296, 37...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>RGT 937 26-Feb-2020 01:34:50 DOY 57 Cycle 6</td>\n",
       "      <td>1582696800</td>\n",
       "      <td>LINESTRING (-147.82 0.052877, -148.2 3.8938, -...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9924</th>\n",
       "      <td>RGT 5 26-Mar-2020 22:00:41 DOY 86 Cycle 7</td>\n",
       "      <td>1585274400</td>\n",
       "      <td>LINESTRING (-114.02 -39.346, -114.48 -35.531, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9925</th>\n",
       "      <td>RGT 6 26-Mar-2020 22:10:58 DOY 86 Cycle 7</td>\n",
       "      <td>1585274400</td>\n",
       "      <td>LINESTRING (-118.23 0.03382, -118.61 3.8747, -...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9926</th>\n",
       "      <td>RGT 6 26-Mar-2020 22:34:58 DOY 86 Cycle 7</td>\n",
       "      <td>1585274400</td>\n",
       "      <td>LINESTRING (103.28 87.302, 74.929 84.034, 67.0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9927</th>\n",
       "      <td>RGT 6 26-Mar-2020 23:00:58 DOY 86 Cycle 7</td>\n",
       "      <td>1585278000</td>\n",
       "      <td>LINESTRING (48.914 -11.325, 48.523 -15.16, 48....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9928</th>\n",
       "      <td>RGT 7 26-Mar-2020 23:45:15 DOY 86 Cycle 7</td>\n",
       "      <td>1585278000</td>\n",
       "      <td>LINESTRING (-141.84 0.0084401, -142.23 3.8494,...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9929 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       Descriptio  is_timesta  \\\n",
       "0     RGT 935 26-Feb-2020 00:00:15 DOY 57 Cycle 6  1582693200   \n",
       "1     RGT 936 26-Feb-2020 00:00:32 DOY 57 Cycle 6  1582693200   \n",
       "2     RGT 936 26-Feb-2020 00:24:32 DOY 57 Cycle 6  1582693200   \n",
       "3     RGT 936 26-Feb-2020 01:00:32 DOY 57 Cycle 6  1582696800   \n",
       "4     RGT 937 26-Feb-2020 01:34:50 DOY 57 Cycle 6  1582696800   \n",
       "...                                           ...         ...   \n",
       "9924    RGT 5 26-Mar-2020 22:00:41 DOY 86 Cycle 7  1585274400   \n",
       "9925    RGT 6 26-Mar-2020 22:10:58 DOY 86 Cycle 7  1585274400   \n",
       "9926    RGT 6 26-Mar-2020 22:34:58 DOY 86 Cycle 7  1585274400   \n",
       "9927    RGT 6 26-Mar-2020 23:00:58 DOY 86 Cycle 7  1585278000   \n",
       "9928    RGT 7 26-Mar-2020 23:45:15 DOY 86 Cycle 7  1585278000   \n",
       "\n",
       "                                               geometry  \n",
       "0        LINESTRING (-124.09 -1.0742, -124.19 0.013965)  \n",
       "1     LINESTRING (-124.19 0.013965, -124.58 3.8548, ...  \n",
       "2     LINESTRING (97.637 87.317, 69.029 84.053, 61.1...  \n",
       "3     LINESTRING (38.443 -49.498, 37.852 -53.296, 37...  \n",
       "4     LINESTRING (-147.82 0.052877, -148.2 3.8938, -...  \n",
       "...                                                 ...  \n",
       "9924  LINESTRING (-114.02 -39.346, -114.48 -35.531, ...  \n",
       "9925  LINESTRING (-118.23 0.03382, -118.61 3.8747, -...  \n",
       "9926  LINESTRING (103.28 87.302, 74.929 84.034, 67.0...  \n",
       "9927  LINESTRING (48.914 -11.325, 48.523 -15.16, 48....  \n",
       "9928  LINESTRING (-141.84 0.0084401, -142.23 3.8494,...  \n",
       "\n",
       "[9929 rows x 3 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "I2_gdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "superior-mining",
   "metadata": {},
   "outputs": [],
   "source": [
    "# first test 1 month data\n",
    "\n",
    "import datetime\n",
    "import time\n",
    "from datetime import timezone\n",
    "\n",
    "number_of_month = 6\n",
    "# assigned regular string date\n",
    "start_date_time = datetime.datetime(2020, 1, 1, 0, 0).replace(tzinfo=timezone.utc)\n",
    "end_date_time = datetime.datetime(2020, 6, 30, 23, 59).replace(tzinfo=timezone.utc)\n",
    "\n",
    "start_date_time_unix = start_date_time.timestamp()\n",
    "end_date_time_unix = end_date_time.timestamp()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea861165-7d15-4b06-8039-8f07f24ac4cf",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "informed-killer",
   "metadata": {},
   "outputs": [],
   "source": [
    "Sentinel_gdf_selected_duration = Sentinel_gdf[(Sentinel_gdf['S2_unix'] <= end_date_time_unix) & (Sentinel_gdf['S2_unix'] >= start_date_time_unix)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "intelligent-complexity",
   "metadata": {},
   "outputs": [],
   "source": [
    "I2_gdf_selected_duration = I2_gdf[(I2_gdf['is_timesta'] <= end_date_time_unix) & (I2_gdf['is_timesta'] >= start_date_time_unix)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44716dc6-e059-4730-9311-5410c3af5e64",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "accompanied-remains",
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_pandas_sjoin(delay_length):\n",
    "    Sentinel_gdf_selected_duration['S2_time_ma'] = Sentinel_gdf_selected_duration['S2_unix'] + delay_length * 3600\n",
    "    Sentinel_gdf_selected_duration['S2_time_mi'] = Sentinel_gdf_selected_duration['S2_unix'] - delay_length * 3600\n",
    "    \n",
    "    start_spatial_time = time.time()\n",
    "    S3I2_sjoin = gpd.sjoin(Sentinel_gdf_selected_duration, I2_gdf_selected_duration)\n",
    "    \n",
    "    S3I2_sjoin = S3I2_sjoin[(S3I2_sjoin['is_timesta'] <= S3I2_sjoin['S2_time_ma']) & (S3I2_sjoin['is_timesta'] >= S3I2_sjoin['S2_time_mi'])]\n",
    "    S3I2_sjoin[['s2_index', 'Descriptio', 'is_timesta']].to_csv('S2I2_coincident_{}month.csv'.format(number_of_month))\n",
    "    with open('ICESpark_baseline_{}_delay_{}_month_geopandas.txt'.format(str(delay_length), str(number_of_month)), 'w') as f:\n",
    "        f.write('time for {} month ST join using geopanas:'.format(str(number_of_month)))\n",
    "        f.write(str(time.time() - start_spatial_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "surgical-skating",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "unexpected-screw",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.11/site-packages/geopandas/geodataframe.py:1819: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  super().__setitem__(key, value)\n",
      "/opt/anaconda3/lib/python3.11/site-packages/geopandas/geodataframe.py:1819: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  super().__setitem__(key, value)\n"
     ]
    }
   ],
   "source": [
    "for delay_i in [6]:\n",
    "    test_pandas_sjoin(delay_i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "developmental-atlanta",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "026d7762-7bc2-4183-90d6-d2d697ed437e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
